Loading Libraries
# Accessing Libraries
library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.1 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.2.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
library(broom)
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(emmeans)
## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'
Loading Data Set
# Loading in Data Files
phouses <- read_csv("cities-month-SA.csv")
## Rows: 295 Columns: 24
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (23): AZ-Phoenix, CA-Los Angeles, CA-San Diego, CA-San Francisco, CO-De...
## date (1): Date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
mortperf <- read_csv("nmdb-mortgage-performance-statistics-metros-quarterly.csv")
## Rows: 150400 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): SOURCE, FREQUENCY, GEOLEVEL, GEONAME, MARKET, PERIOD, MONTH, SERIESID
## dbl (5): GEOID, YEAR, QUARTER, SUPPRESSED, VALUE1
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Cleaning Data
city_map <- data.frame(
phouses_city = c("AZ-Phoenix", "CA-Los Angeles", "CA-San Diego", "CA-San Francisco",
"CO-Denver", "DC-Washington", "FL-Miami", "FL-Tampa", "GA-Atlanta",
"IL-Chicago", "MN-Minneapolis", "NC-Charlotte", "NV-Las Vegas",
"NY-New York", "OH-Cleveland", "OR-Portland", "TX-Dallas", "WA-Seattle"),
geoname_pattern = c("Phoenix", "Los Angeles", "San Diego", "San Francisco",
"Denver", "Washington", "Miami", "Tampa", "Atlanta",
"Chicago", "Minneapolis", "Charlotte", "Las Vegas",
"New York", "Cleveland", "Portland", "Dallas", "Seattle")
) # Comparing lists so that only these cities (which appear in both lists) are accounted for
# Changing
mortperf$GEONAME <- as.character(mortperf$GEONAME)
mortperf$city <- NA
for (i in seq_len(nrow(city_map))) {
pattern <- city_map$geoname_pattern[i]
city_name <- city_map$phouses_city[i]
match_rows <- grepl(pattern, mortperf$GEONAME, ignore.case = TRUE)
mortperf$city[match_rows] <- city_name
}
mortperf_filtered <- mortperf[!is.na(mortperf$city), ]
Combining the data sets
# Housing Data from wide to long (cities are columns)
phousesl <- phouses %>%
pivot_longer(
cols = -Date,
names_to = "city",
values_to = "house_value"
)
# selecing only relevant mortgage data
mortperfs <- mortperf_filtered %>%
select(city, PERIOD, VALUE1, SERIESID) %>%
rename(mortgage_value = VALUE1)
# Changing date realated data to different types for easier merging
phousesl <- phousesl %>%
mutate(Date = as.Date(Date),
year = format(Date, "%Y"),
quarter = ceiling(as.numeric(format(Date, "%m")) / 3),
period = paste0(year, "Q", quarter))
# combining both data sets
combined <- merge(
phousesl,
mortperfs,
by.x = c("city", "period"),
by.y = c("city", "PERIOD"),
all = FALSE
)
Methods/Assumptions:
# Shapiro-Wilk test for normality
norm_city <- combined %>%
group_by(city) %>%
summarize(
shapiro = shapiro.test(house_value)$p.value,
n = n()
)
print(norm_city)
## # A tibble: 18 × 3
## city shapiro n
## <chr> <dbl> <int>
## 1 AZ-Phoenix 4.22e-24 1840
## 2 CA-Los Angeles 3.73e-37 1840
## 3 CA-San Diego 3.56e-37 1840
## 4 CA-San Francisco 2.08e-36 1840
## 5 CO-Denver 2.34e-38 1840
## 6 DC-Washington 2.84e-36 1840
## 7 FL-Miami 6.87e-37 1840
## 8 FL-Tampa 8.02e-28 1840
## 9 GA-Atlanta 8.25e-39 1840
## 10 IL-Chicago 6.28e-29 1840
## 11 MN-Minneapolis 1.71e-32 1840
## 12 NC-Charlotte 1.25e-38 1840
## 13 NV-Las Vegas 1.25e-36 1840
## 14 NY-New York 2.48e-31 1840
## 15 OH-Cleveland 9.98e-38 1840
## 16 OR-Portland 1.64e-30 1840
## 17 TX-Dallas 6.05e-32 1840
## 18 WA-Seattle 8.16e-24 1840
# Levene's test for equal variance
leveneTest(house_value ~ city, data = combined)
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 17 735.52 < 2.2e-16 ***
## 33102
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Kruskal-Wallis test and One Way ANOVA test
# Non Parametric Version of a one Way Anova
kruskal.test(house_value ~ city, data = combined)
##
## Kruskal-Wallis rank sum test
##
## data: house_value by city
## Kruskal-Wallis chi-squared = 20061, df = 17, p-value < 2.2e-16
oneway.test(house_value ~ city, data = combined, var.equal = FALSE)
##
## One-way analysis of means (not assuming equal variances)
##
## data: house_value and city
## F = 10286, num df = 17, denom df = 12190, p-value < 2.2e-16
Emmeans City Comparison
# Linear Model
model <- lm(house_value ~ mortgage_value * city, data = combined)
# City by City Comparison analysis
emtrends(model, pairwise ~ city, var = "mortgage_value")
## $emtrends
## city mortgage_value.trend SE df lower.CL upper.CL
## AZ-Phoenix -1.4581 0.335 24471 -2.114 -0.802
## CA-Los Angeles -5.8718 0.376 24471 -6.608 -5.135
## CA-San Diego 1.5758 0.459 24471 0.677 2.475
## CA-San Francisco 26.7542 0.817 24471 25.153 28.355
## CO-Denver -1.8996 0.573 24471 -3.023 -0.776
## DC-Washington -3.0374 0.356 24471 -3.735 -2.340
## FL-Miami -4.0559 0.192 24471 -4.432 -3.680
## FL-Tampa -0.0129 0.310 24471 -0.621 0.595
## GA-Atlanta -4.7086 0.334 24471 -5.364 -4.054
## IL-Chicago -0.0377 0.353 24471 -0.729 0.653
## MN-Minneapolis -0.6823 0.585 24471 -1.829 0.465
## NC-Charlotte 6.9644 0.412 24471 6.157 7.772
## NV-Las Vegas -3.8346 0.237 24471 -4.299 -3.371
## NY-New York -3.7989 0.370 24471 -4.524 -3.074
## OH-Cleveland -3.1681 0.441 24471 -4.032 -2.304
## OR-Portland -4.4882 0.556 24471 -5.577 -3.399
## TX-Dallas -0.1548 0.427 24471 -0.992 0.683
## WA-Seattle -1.9079 0.612 24471 -3.107 -0.709
##
## Confidence level used: 0.95
##
## $contrasts
## contrast estimate SE df t.ratio p.value
## (AZ-Phoenix) - (CA-Los Angeles) 4.41372 0.503 24471 8.771 <.0001
## (AZ-Phoenix) - (CA-San Diego) -3.03386 0.568 24471 -5.344 <.0001
## (AZ-Phoenix) - (CA-San Francisco) -28.21230 0.883 24471 -31.966 <.0001
## (AZ-Phoenix) - (CO-Denver) 0.44154 0.664 24471 0.665 1.0000
## (AZ-Phoenix) - (DC-Washington) 1.57936 0.488 24471 3.234 0.1083
## (AZ-Phoenix) - (FL-Miami) 2.59778 0.386 24471 6.736 <.0001
## (AZ-Phoenix) - (FL-Tampa) -1.44515 0.456 24471 -3.167 0.1302
## (AZ-Phoenix) - (GA-Atlanta) 3.25049 0.473 24471 6.873 <.0001
## (AZ-Phoenix) - (IL-Chicago) -1.42036 0.486 24471 -2.922 0.2402
## (AZ-Phoenix) - (MN-Minneapolis) -0.77582 0.674 24471 -1.151 0.9996
## (AZ-Phoenix) - (NC-Charlotte) -8.42251 0.531 24471 -15.869 <.0001
## (AZ-Phoenix) - (NV-Las Vegas) 2.37650 0.410 24471 5.798 <.0001
## (AZ-Phoenix) - (NY-New York) 2.34077 0.499 24471 4.693 0.0004
## (AZ-Phoenix) - (OH-Cleveland) 1.71000 0.554 24471 3.089 0.1599
## (AZ-Phoenix) - (OR-Portland) 3.03014 0.649 24471 4.671 0.0004
## (AZ-Phoenix) - (TX-Dallas) -1.30324 0.543 24471 -2.401 0.6072
## (AZ-Phoenix) - (WA-Seattle) 0.44981 0.697 24471 0.645 1.0000
## (CA-Los Angeles) - (CA-San Diego) -7.44759 0.593 24471 -12.562 <.0001
## (CA-Los Angeles) - (CA-San Francisco) -32.62602 0.899 24471 -36.292 <.0001
## (CA-Los Angeles) - (CO-Denver) -3.97219 0.685 24471 -5.795 <.0001
## (CA-Los Angeles) - (DC-Washington) -2.83436 0.517 24471 -5.478 <.0001
## (CA-Los Angeles) - (FL-Miami) -1.81595 0.422 24471 -4.305 0.0023
## (CA-Los Angeles) - (FL-Tampa) -5.85887 0.487 24471 -12.024 <.0001
## (CA-Los Angeles) - (GA-Atlanta) -1.16323 0.503 24471 -2.313 0.6739
## (CA-Los Angeles) - (IL-Chicago) -5.83409 0.515 24471 -11.322 <.0001
## (CA-Los Angeles) - (MN-Minneapolis) -5.18954 0.695 24471 -7.463 <.0001
## (CA-Los Angeles) - (NC-Charlotte) -12.83624 0.558 24471 -23.021 <.0001
## (CA-Los Angeles) - (NV-Las Vegas) -2.03722 0.444 24471 -4.587 0.0006
## (CA-Los Angeles) - (NY-New York) -2.07295 0.527 24471 -3.932 0.0104
## (CA-Los Angeles) - (OH-Cleveland) -2.70372 0.579 24471 -4.667 0.0004
## (CA-Los Angeles) - (OR-Portland) -1.38358 0.671 24471 -2.062 0.8379
## (CA-Los Angeles) - (TX-Dallas) -5.71697 0.569 24471 -10.046 <.0001
## (CA-Los Angeles) - (WA-Seattle) -3.96392 0.718 24471 -5.522 <.0001
## (CA-San Diego) - (CA-San Francisco) -25.17843 0.937 24471 -26.883 <.0001
## (CA-San Diego) - (CO-Denver) 3.47540 0.734 24471 4.734 0.0003
## (CA-San Diego) - (DC-Washington) 4.61323 0.580 24471 7.949 <.0001
## (CA-San Diego) - (FL-Miami) 5.63164 0.497 24471 11.332 <.0001
## (CA-San Diego) - (FL-Tampa) 1.58871 0.554 24471 2.870 0.2698
## (CA-San Diego) - (GA-Atlanta) 6.28435 0.567 24471 11.076 <.0001
## (CA-San Diego) - (IL-Chicago) 1.61350 0.578 24471 2.790 0.3195
## (CA-San Diego) - (MN-Minneapolis) 2.25804 0.743 24471 3.037 0.1824
## (CA-San Diego) - (NC-Charlotte) -5.38865 0.616 24471 -8.742 <.0001
## (CA-San Diego) - (NV-Las Vegas) 5.41036 0.516 24471 10.484 <.0001
## (CA-San Diego) - (NY-New York) 5.37464 0.589 24471 9.124 <.0001
## (CA-San Diego) - (OH-Cleveland) 4.74386 0.636 24471 7.458 <.0001
## (CA-San Diego) - (OR-Portland) 6.06401 0.720 24471 8.417 <.0001
## (CA-San Diego) - (TX-Dallas) 1.73062 0.627 24471 2.761 0.3382
## (CA-San Diego) - (WA-Seattle) 3.48367 0.764 24471 4.557 0.0007
## (CA-San Francisco) - (CO-Denver) 28.65383 0.998 24471 28.717 <.0001
## (CA-San Francisco) - (DC-Washington) 29.79166 0.891 24471 33.444 <.0001
## (CA-San Francisco) - (FL-Miami) 30.81007 0.839 24471 36.728 <.0001
## (CA-San Francisco) - (FL-Tampa) 26.76715 0.874 24471 30.640 <.0001
## (CA-San Francisco) - (GA-Atlanta) 31.46279 0.882 24471 35.656 <.0001
## (CA-San Francisco) - (IL-Chicago) 26.79193 0.890 24471 30.119 <.0001
## (CA-San Francisco) - (MN-Minneapolis) 27.43648 1.000 24471 27.309 <.0001
## (CA-San Francisco) - (NC-Charlotte) 19.78978 0.915 24471 21.636 <.0001
## (CA-San Francisco) - (NV-Las Vegas) 30.58880 0.850 24471 35.974 <.0001
## (CA-San Francisco) - (NY-New York) 30.55307 0.897 24471 34.080 <.0001
## (CA-San Francisco) - (OH-Cleveland) 29.92230 0.928 24471 32.241 <.0001
## (CA-San Francisco) - (OR-Portland) 31.24244 0.988 24471 31.628 <.0001
## (CA-San Francisco) - (TX-Dallas) 26.90905 0.922 24471 29.194 <.0001
## (CA-San Francisco) - (WA-Seattle) 28.66210 1.020 24471 28.092 <.0001
## (CO-Denver) - (DC-Washington) 1.13783 0.675 24471 1.686 0.9694
## (CO-Denver) - (FL-Miami) 2.15624 0.604 24471 3.567 0.0387
## (CO-Denver) - (FL-Tampa) -1.88669 0.652 24471 -2.894 0.2555
## (CO-Denver) - (GA-Atlanta) 2.80896 0.664 24471 4.233 0.0031
## (CO-Denver) - (IL-Chicago) -1.86190 0.673 24471 -2.766 0.3346
## (CO-Denver) - (MN-Minneapolis) -1.21736 0.819 24471 -1.486 0.9917
## (CO-Denver) - (NC-Charlotte) -8.86405 0.706 24471 -12.556 <.0001
## (CO-Denver) - (NV-Las Vegas) 1.93497 0.620 24471 3.120 0.1478
## (CO-Denver) - (NY-New York) 1.89924 0.682 24471 2.784 0.3231
## (CO-Denver) - (OH-Cleveland) 1.26846 0.723 24471 1.754 0.9559
## (CO-Denver) - (OR-Portland) 2.58861 0.798 24471 3.242 0.1057
## (CO-Denver) - (TX-Dallas) -1.74478 0.715 24471 -2.440 0.5768
## (CO-Denver) - (WA-Seattle) 0.00827 0.838 24471 0.010 1.0000
## (DC-Washington) - (FL-Miami) 1.01841 0.404 24471 2.520 0.5144
## (DC-Washington) - (FL-Tampa) -3.02451 0.472 24471 -6.408 <.0001
## (DC-Washington) - (GA-Atlanta) 1.67113 0.488 24471 3.424 0.0615
## (DC-Washington) - (IL-Chicago) -2.99973 0.501 24471 -5.989 <.0001
## (DC-Washington) - (MN-Minneapolis) -2.35518 0.685 24471 -3.439 0.0586
## (DC-Washington) - (NC-Charlotte) -10.00188 0.544 24471 -18.376 <.0001
## (DC-Washington) - (NV-Las Vegas) 0.79714 0.427 24471 1.866 0.9245
## (DC-Washington) - (NY-New York) 0.76141 0.513 24471 1.484 0.9918
## (DC-Washington) - (OH-Cleveland) 0.13064 0.567 24471 0.231 1.0000
## (DC-Washington) - (OR-Portland) 1.45078 0.660 24471 2.199 0.7548
## (DC-Washington) - (TX-Dallas) -2.88261 0.556 24471 -5.184 <.0001
## (DC-Washington) - (WA-Seattle) -1.12956 0.708 24471 -1.596 0.9822
## (FL-Miami) - (FL-Tampa) -4.04293 0.365 24471 -11.088 <.0001
## (FL-Miami) - (GA-Atlanta) 0.65271 0.385 24471 1.694 0.9680
## (FL-Miami) - (IL-Chicago) -4.01814 0.401 24471 -10.013 <.0001
## (FL-Miami) - (MN-Minneapolis) -3.37360 0.616 24471 -5.479 <.0001
## (FL-Miami) - (NC-Charlotte) -11.02029 0.454 24471 -24.255 <.0001
## (FL-Miami) - (NV-Las Vegas) -0.22128 0.305 24471 -0.726 1.0000
## (FL-Miami) - (NY-New York) -0.25700 0.417 24471 -0.617 1.0000
## (FL-Miami) - (OH-Cleveland) -0.88778 0.481 24471 -1.847 0.9307
## (FL-Miami) - (OR-Portland) 0.43237 0.588 24471 0.736 1.0000
## (FL-Miami) - (TX-Dallas) -3.90102 0.468 24471 -8.329 <.0001
## (FL-Miami) - (WA-Seattle) -2.14797 0.641 24471 -3.351 0.0768
## (FL-Tampa) - (GA-Atlanta) 4.69564 0.456 24471 10.298 <.0001
## (FL-Tampa) - (IL-Chicago) 0.02479 0.470 24471 0.053 1.0000
## (FL-Tampa) - (MN-Minneapolis) 0.66933 0.662 24471 1.011 0.9999
## (FL-Tampa) - (NC-Charlotte) -6.97736 0.516 24471 -13.531 <.0001
## (FL-Tampa) - (NV-Las Vegas) 3.82165 0.390 24471 9.794 <.0001
## (FL-Tampa) - (NY-New York) 3.78592 0.483 24471 7.844 <.0001
## (FL-Tampa) - (OH-Cleveland) 3.15515 0.539 24471 5.853 <.0001
## (FL-Tampa) - (OR-Portland) 4.47529 0.636 24471 7.032 <.0001
## (FL-Tampa) - (TX-Dallas) 0.14191 0.528 24471 0.269 1.0000
## (FL-Tampa) - (WA-Seattle) 1.89496 0.686 24471 2.763 0.3368
## (GA-Atlanta) - (IL-Chicago) -4.67085 0.486 24471 -9.615 <.0001
## (GA-Atlanta) - (MN-Minneapolis) -4.02631 0.674 24471 -5.975 <.0001
## (GA-Atlanta) - (NC-Charlotte) -11.67301 0.530 24471 -22.006 <.0001
## (GA-Atlanta) - (NV-Las Vegas) -0.87399 0.410 24471 -2.134 0.7963
## (GA-Atlanta) - (NY-New York) -0.90972 0.498 24471 -1.825 0.9372
## (GA-Atlanta) - (OH-Cleveland) -1.54049 0.553 24471 -2.785 0.3227
## (GA-Atlanta) - (OR-Portland) -0.22035 0.648 24471 -0.340 1.0000
## (GA-Atlanta) - (TX-Dallas) -4.55373 0.542 24471 -8.394 <.0001
## (GA-Atlanta) - (WA-Seattle) -2.80068 0.697 24471 -4.019 0.0074
## (IL-Chicago) - (MN-Minneapolis) 0.64454 0.683 24471 0.943 1.0000
## (IL-Chicago) - (NC-Charlotte) -7.00215 0.542 24471 -12.914 <.0001
## (IL-Chicago) - (NV-Las Vegas) 3.79686 0.425 24471 8.941 <.0001
## (IL-Chicago) - (NY-New York) 3.76114 0.511 24471 7.361 <.0001
## (IL-Chicago) - (OH-Cleveland) 3.13036 0.565 24471 5.545 <.0001
## (IL-Chicago) - (OR-Portland) 4.45051 0.658 24471 6.762 <.0001
## (IL-Chicago) - (TX-Dallas) 0.11712 0.554 24471 0.211 1.0000
## (IL-Chicago) - (WA-Seattle) 1.87017 0.706 24471 2.649 0.4169
## (MN-Minneapolis) - (NC-Charlotte) -7.64669 0.716 24471 -10.686 <.0001
## (MN-Minneapolis) - (NV-Las Vegas) 3.15232 0.631 24471 4.994 0.0001
## (MN-Minneapolis) - (NY-New York) 3.11659 0.692 24471 4.502 0.0009
## (MN-Minneapolis) - (OH-Cleveland) 2.48582 0.733 24471 3.393 0.0677
## (MN-Minneapolis) - (OR-Portland) 3.80596 0.807 24471 4.716 0.0003
## (MN-Minneapolis) - (TX-Dallas) -0.52742 0.725 24471 -0.728 1.0000
## (MN-Minneapolis) - (WA-Seattle) 1.22563 0.846 24471 1.448 0.9938
## (NC-Charlotte) - (NV-Las Vegas) 10.79902 0.475 24471 22.729 <.0001
## (NC-Charlotte) - (NY-New York) 10.76329 0.554 24471 19.443 <.0001
## (NC-Charlotte) - (OH-Cleveland) 10.13252 0.603 24471 16.793 <.0001
## (NC-Charlotte) - (OR-Portland) 11.45266 0.692 24471 16.556 <.0001
## (NC-Charlotte) - (TX-Dallas) 7.11927 0.594 24471 11.994 <.0001
## (NC-Charlotte) - (WA-Seattle) 8.87232 0.737 24471 12.032 <.0001
## (NV-Las Vegas) - (NY-New York) -0.03573 0.439 24471 -0.081 1.0000
## (NV-Las Vegas) - (OH-Cleveland) -0.66650 0.500 24471 -1.332 0.9976
## (NV-Las Vegas) - (OR-Portland) 0.65364 0.604 24471 1.082 0.9998
## (NV-Las Vegas) - (TX-Dallas) -3.67974 0.489 24471 -7.532 <.0001
## (NV-Las Vegas) - (WA-Seattle) -1.92669 0.656 24471 -2.938 0.2316
## (NY-New York) - (OH-Cleveland) -0.63077 0.575 24471 -1.096 0.9998
## (NY-New York) - (OR-Portland) 0.68937 0.668 24471 1.033 0.9999
## (NY-New York) - (TX-Dallas) -3.64402 0.565 24471 -6.448 <.0001
## (NY-New York) - (WA-Seattle) -1.89097 0.715 24471 -2.646 0.4193
## (OH-Cleveland) - (OR-Portland) 1.32014 0.709 24471 1.861 0.9260
## (OH-Cleveland) - (TX-Dallas) -3.01324 0.614 24471 -4.907 0.0001
## (OH-Cleveland) - (WA-Seattle) -1.26019 0.754 24471 -1.671 0.9719
## (OR-Portland) - (TX-Dallas) -4.33339 0.701 24471 -6.181 <.0001
## (OR-Portland) - (WA-Seattle) -2.58034 0.826 24471 -3.123 0.1467
## (TX-Dallas) - (WA-Seattle) 1.75305 0.746 24471 2.350 0.6465
##
## P value adjustment: tukey method for comparing a family of 18 estimates
Fig 1:
# Boxplot, using median house value for geographical city
ggplot(combined, aes(x = city, y = house_value)) +
geom_boxplot(fill = "darkred", color = "gray30") +
labs(title = "House Value by City",
x = "City",
y = "House Value (Index)") +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Creating Summary Statistics
combined %>%
group_by(city) %>%
summarise(
mean_house = mean(house_value, na.rm = TRUE),
median_house = median(house_value, na.rm = TRUE),
sd_house = sd(house_value, na.rm = TRUE),
q1 = quantile(house_value, 0.25, na.rm = TRUE),
q3 = quantile(house_value, 0.75, na.rm = TRUE),
iqr = IQR(house_value, na.rm = TRUE),
.groups = 'drop'
)
## # A tibble: 18 × 7
## city mean_house median_house sd_house q1 q3 iqr
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AZ-Phoenix 179. 173. 24.7 167. 201. 34.9
## 2 CA-Los Angeles 165. 152. 33.1 137. 203. 65.9
## 3 CA-San Diego 81.5 80.1 5.59 76.8 84.5 7.61
## 4 CA-San Francisco 240. 219. 50.7 198. 302. 104.
## 5 CO-Denver 189. 167. 51.7 146. 236. 90.1
## 6 DC-Washington 140. 133. 21.9 120. 163. 43.3
## 7 FL-Miami 190. 173. 39.5 156. 234. 78.5
## 8 FL-Tampa 147. 147. 24.0 127. 172. 45.1
## 9 GA-Atlanta 149. 132. 44.7 109. 192. 83.0
## 10 IL-Chicago 118. 117. 8.81 111. 126. 14.6
## 11 MN-Minneapolis 89.4 87.8 18.2 71.3 100. 29.1
## 12 NC-Charlotte 215. 193. 52.4 175. 286. 111.
## 13 NV-Las Vegas 167. 153. 37.9 137. 201. 64.5
## 14 NY-New York 198. 178. 45.6 166. 241. 75.0
## 15 OH-Cleveland 159. 143. 50.9 114. 215. 100.
## 16 OR-Portland 141. 139. 17.7 125. 158. 32.2
## 17 TX-Dallas 130. 128. 5.13 125. 135. 9.57
## 18 WA-Seattle 161. 158. 12.4 152. 171. 18.9
ggsave("fig1.png")
## Saving 7 x 5 in image
Fig 2
# Running Liner Model for Each City
results <- combined %>%
group_by(city) %>%
do({
mod <- lm(house_value ~ mortgage_value, data = .)
tibble(
shapiro_p = if(nrow(.) >= 3 & nrow(.) <= 5000) shapiro.test(residuals(mod))$p.value else NA,
)
}) %>% ungroup()
print(results)
## # A tibble: 18 × 2
## city shapiro_p
## <chr> <dbl>
## 1 AZ-Phoenix 1.56e-20
## 2 CA-Los Angeles 4.68e-25
## 3 CA-San Diego 1.48e-29
## 4 CA-San Francisco 4.60e-21
## 5 CO-Denver 6.06e-33
## 6 DC-Washington 2.38e-23
## 7 FL-Miami 4.87e-21
## 8 FL-Tampa 3.00e-24
## 9 GA-Atlanta 1.57e-28
## 10 IL-Chicago 4.26e-25
## 11 MN-Minneapolis 9.02e-28
## 12 NC-Charlotte 3.98e-26
## 13 NV-Las Vegas 8.39e-24
## 14 NY-New York 9.86e-21
## 15 OH-Cleveland 2.82e-32
## 16 OR-Portland 7.31e-18
## 17 TX-Dallas 2.78e-27
## 18 WA-Seattle 6.37e-14
# Dignostics for San Francisco
mod <- lm(house_value ~ mortgage_value, data = filter(combined, city =="CA-San Francisco"))
plot(mod)




summary(mod)
##
## Call:
## lm(formula = house_value ~ mortgage_value, data = filter(combined,
## city == "CA-San Francisco"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -81.724 -28.181 -6.377 18.071 115.854
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 215.4733 1.5491 139.09 <2e-16 ***
## mortgage_value 26.7542 0.9956 26.87 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 39.47 on 1045 degrees of freedom
## (793 observations deleted due to missingness)
## Multiple R-squared: 0.4086, Adjusted R-squared: 0.4081
## F-statistic: 722.1 on 1 and 1045 DF, p-value: < 2.2e-16
print(mod)
##
## Call:
## lm(formula = house_value ~ mortgage_value, data = filter(combined,
## city == "CA-San Francisco"))
##
## Coefficients:
## (Intercept) mortgage_value
## 215.47 26.75
# Dignostics for Los Angeles
mod1 <- lm(house_value ~ mortgage_value, data = filter(combined, city =="CA-Los Angeles"))
plot(mod1)




summary(mod1)
##
## Call:
## lm(formula = house_value ~ mortgage_value, data = filter(combined,
## city == "CA-Los Angeles"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -53.882 -27.010 -6.538 31.512 64.344
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 178.4931 1.1263 158.48 <2e-16 ***
## mortgage_value -5.8718 0.3505 -16.75 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 30.19 on 1378 degrees of freedom
## (460 observations deleted due to missingness)
## Multiple R-squared: 0.1692, Adjusted R-squared: 0.1686
## F-statistic: 280.7 on 1 and 1378 DF, p-value: < 2.2e-16
print(mod1)
##
## Call:
## lm(formula = house_value ~ mortgage_value, data = filter(combined,
## city == "CA-Los Angeles"))
##
## Coefficients:
## (Intercept) mortgage_value
## 178.493 -5.872
Fig 2 and 3
# Facuet Grid for Linear Model
ggplot(combined, aes(x = mortgage_value, y = house_value)) +
geom_point(alpha = 0.4, color = "steelblue") +
geom_smooth(method = "lm", se = FALSE, color = "darkred") +
facet_wrap(~ city, scales = "free_y", ncol = 4) +
labs(
title = "Mortgage Performance vs. House Value by City",
x = "Mortgage Performance",
y = "House Value (Index)"
) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 8613 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 8613 rows containing missing values or values outside the scale range
## (`geom_point()`).

ggsave("fig2.png")
## Saving 7 x 5 in image
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 8613 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Removed 8613 rows containing missing values or values outside the scale range
## (`geom_point()`).
# Summary Statistics
combined %>%
group_by(city) %>%
summarise(
mean_mortgage = mean(mortgage_value, na.rm = TRUE),
mean_house = mean(house_value, na.rm = TRUE),
cor_house_mortgage = cor(house_value, mortgage_value),
n = n()
)
## # A tibble: 18 × 5
## city mean_mortgage mean_house cor_house_mortgage n
## <chr> <dbl> <dbl> <dbl> <int>
## 1 AZ-Phoenix 2.85 179. NA 1840
## 2 CA-Los Angeles 2.22 165. NA 1840
## 3 CA-San Diego 1.77 81.5 NA 1840
## 4 CA-San Francisco 0.959 240. NA 1840
## 5 CO-Denver 2.28 189. NA 1840
## 6 DC-Washington 3.00 140. NA 1840
## 7 FL-Miami 4.44 190. NA 1840
## 8 FL-Tampa 3.41 147. NA 1840
## 9 GA-Atlanta 3.69 149. NA 1840
## 10 IL-Chicago 3.20 118. NA 1840
## 11 MN-Minneapolis 1.94 89.4 NA 1840
## 12 NC-Charlotte 3.06 215. NA 1840
## 13 NV-Las Vegas 3.61 167. NA 1840
## 14 NY-New York 2.70 198. NA 1840
## 15 OH-Cleveland 3.52 159. NA 1840
## 16 OR-Portland 1.91 141. NA 1840
## 17 TX-Dallas 2.75 130. NA 1840
## 18 WA-Seattle 1.56 161. NA 1840
# Comparison of San Fransico and Los Angeles (both in the state of California)
combined %>%
filter(city %in% c("CA-San Francisco", "CA-Los Angeles")) %>%
ggplot(aes(x = mortgage_value, y = house_value, color = city)) +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm", se = TRUE) +
labs(
title = "San Francisco vs Los Angeles House Value Trends",
x = "Mortgage Performance",
y = "House Value"
)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1253 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 1253 rows containing missing values or values outside the scale range
## (`geom_point()`).

ggsave("fig3.png")
## Saving 7 x 5 in image
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1253 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Removed 1253 rows containing missing values or values outside the scale range
## (`geom_point()`).